InĀ [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

1. Read the file¶

InĀ [2]:
#1. بارگذاری ŲÆŲ§ŲÆŁ‡ā€ŒŁ‡Ų§
dff = pd.read_excel('cleardata.xlsx')
InĀ [4]:
dff = dff.rename(columns={'managhe_shahrdari': 'region'})
InĀ [5]:
## ostan tehran 
df = dff[dff.Ostan == "Tehran"]
InĀ [6]:
df
Out[6]:
region masahat price age eskelet date Ostan Shahrestan
0 NaN 60.00 5000.00 10 felezi 1395/01/01 Tehran Pakdasht
1 14.0 70.63 35962.06 1 botoni 1395/01/01 Tehran Tehran
3 2.0 196.16 173327.90 20 felezi 1395/01/01 Tehran Tehran
6 1.0 87.00 34482.77 1 botoni 1395/01/01 Tehran Tehran
8 6.0 108.88 60617.19 39 felezi 1395/01/02 Tehran Tehran
... ... ... ... ... ... ... ... ...
331843 5.0 120.00 45000.00 1 botoni 1395/12/30 Tehran Tehran
331844 10.0 84.54 33120.42 2 botoni and felezi 1395/12/30 Tehran Tehran
331845 16.0 47.95 21897.81 4 felezi 1395/12/30 Tehran Ray
331855 4.0 65.00 32000.00 3 botoni 1395/12/30 Tehran Tehran
331859 NaN 35.00 10000.00 13 felezi 1395/12/30 Tehran Quadruple

184244 rows Ɨ 8 columns

2.Exploring the data¶

InĀ [7]:
df.describe()
Out[7]:
region masahat price age
count 164985.000000 1.842440e+05 1.842430e+05 184244.000000
mean 8.505428 7.399230e+02 4.552438e+04 7.854139
std 5.643888 2.062575e+05 2.650269e+05 9.208918
min 1.000000 1.000000e+00 1.000000e-02 0.000000
25% 4.000000 6.043000e+01 2.375000e+04 1.000000
50% 7.000000 7.677000e+01 3.462604e+04 5.000000
75% 13.000000 1.007000e+02 5.000000e+04 13.000000
max 22.000000 8.840498e+07 7.100000e+07 1309.000000
InĀ [8]:
df.head()
Out[8]:
region masahat price age eskelet date Ostan Shahrestan
0 NaN 60.00 5000.00 10 felezi 1395/01/01 Tehran Pakdasht
1 14.0 70.63 35962.06 1 botoni 1395/01/01 Tehran Tehran
3 2.0 196.16 173327.90 20 felezi 1395/01/01 Tehran Tehran
6 1.0 87.00 34482.77 1 botoni 1395/01/01 Tehran Tehran
8 6.0 108.88 60617.19 39 felezi 1395/01/02 Tehran Tehran
InĀ [9]:
# محاسبه Ś©ŁˆŲ§Ł†ŲŖŲ§ŪŒŁ„ā€ŒŁ‡Ų§ŪŒ Ū²Ū° و ŪøŪ° ŲÆŲ±ŲµŲÆ
q_20 = df['price'].quantile(0.2)
q_80 = df['price'].quantile(0.8)

# ŁŪŒŁ„ŲŖŲ±ŪŒŁ†ŚÆ ŲØŲ± Ų§Ų³Ų§Ų³ Ś©ŁˆŲ§Ł†ŲŖŲ§ŪŒŁ„ā€ŒŁ‡Ų§
df = df[(df['price'] >= q_20) & (df['price'] <= q_80)]
InĀ [10]:
# price / 10,000
df['price'] = df['price']%10000
df.head(2)
/tmp/ipykernel_22977/346169701.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['price'] = df['price']%10000
Out[10]:
region masahat price age eskelet date Ostan Shahrestan
1 14.0 70.63 5962.06 1 botoni 1395/01/01 Tehran Tehran
6 1.0 87.00 4482.77 1 botoni 1395/01/01 Tehran Tehran
InĀ [11]:
df['price'] = np.log(df['price'])
df.head(5)
/home/anjel/.local/lib/python3.11/site-packages/pandas/core/arraylike.py:399: RuntimeWarning: divide by zero encountered in log
  result = getattr(ufunc, method)(*inputs, **kwargs)
/tmp/ipykernel_22977/3109476270.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['price'] = np.log(df['price'])
Out[11]:
region masahat price age eskelet date Ostan Shahrestan
1 14.0 70.63 8.693171 1 botoni 1395/01/01 Tehran Tehran
6 1.0 87.00 8.407996 1 botoni 1395/01/01 Tehran Tehran
21 18.0 64.65 7.582952 10 botoni 1395/01/02 Tehran Tehran
25 11.0 44.02 8.514919 23 felezi 1395/01/03 Tehran Tehran
40 5.0 103.83 7.819897 3 botoni 1395/01/03 Tehran Tehran
InĀ [12]:
df['price'].isin([-np.inf]).sum()
Out[12]:
1773
InĀ [13]:
df = df[~df['price'].isin([-np.inf])]
InĀ [14]:
#check -inf value is clear
df['price'].isin([-np.inf]).sum()
Out[14]:
0
InĀ [15]:
#برررسی Ų¹ŲÆŲÆ Ū° و Ł…Ł†ŁŪŒ ŲÆŲ± Ų³ŲŖŁˆŁ† ها 
count_zero_or_negative1 = (df['masahat'] <= 0).sum()
print(count_zero_or_negative1, 'Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ±  Ł…Ų³Ų§Ų­ŲŖ')

count_zero_or_negative2 = (df['price'] <= 0).sum()
print(count_zero_or_negative2, ' : Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ±  یک Ł…ŲŖŲ± Ł…Ų±ŲØŲ¹')

count_zero_or_negative3 = (df['age'] < 0).sum()
print(count_zero_or_negative3, ' : Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ± سن بنا')
0 Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ±  Ł…Ų³Ų§Ų­ŲŖ
0  : Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ±  یک Ł…ŲŖŲ± Ł…Ų±ŲØŲ¹
0  : Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ± سن بنا
InĀ [16]:
# value <0 is drop
# حذف Ų³Ų·Ų±Ł‡Ų§ŪŒŪŒ که مقدار یکی Ų§Ų² Ų³ŲŖŁˆŁ†ā€ŒŁ‡Ų§ (masahat، gheymat_1_metr_moraba، age_bana) Ł…Ł†ŁŪŒ یا صفر ŲØŲ§Ų“ŲÆ
df = df[(df['masahat'] > 0) & (df['price'] > 0) & (df['age'] > 0)]
InĀ [17]:
#برررسی Ų¹ŲÆŲÆ Ū° و Ł…Ł†ŁŪŒ ŲÆŲ± Ų³ŲŖŁˆŁ† ها 
count_zero_or_negative1 = (df['masahat'] <= 0).sum()
print(count_zero_or_negative1, 'Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ±  Ł…Ų³Ų§Ų­ŲŖ')

count_zero_or_negative2 = (df['price'] <= 0).sum()
print(count_zero_or_negative2, ' Ł‚ŪŒŁ…ŲŖ :')

count_zero_or_negative3 = (df['age'] < 0).sum()
print(count_zero_or_negative3, ' : Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ± سن بنا')
0 Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ±  Ł…Ų³Ų§Ų­ŲŖ
0  Ł‚ŪŒŁ…ŲŖ :
0  : Ų¹ŲÆŲÆ Ł…Ł†ŁŪŒ ŲÆŲ± سن بنا
InĀ [18]:
df.isnull().sum()
Out[18]:
region        1476
masahat          0
price            0
age              0
eskelet          0
date             0
Ostan            0
Shahrestan       0
dtype: int64
InĀ [19]:
df.fillna(0, inplace=True)
df.isnull().sum()
Out[19]:
region        0
masahat       0
price         0
age           0
eskelet       0
date          0
Ostan         0
Shahrestan    0
dtype: int64
InĀ [20]:
df.count()
Out[20]:
region        95427
masahat       95427
price         95427
age           95427
eskelet       95427
date          95427
Ostan         95427
Shahrestan    95427
dtype: int64
InĀ [21]:
df.head(2)
Out[21]:
region masahat price age eskelet date Ostan Shahrestan
1 14.0 70.63 8.693171 1 botoni 1395/01/01 Tehran Tehran
6 1.0 87.00 8.407996 1 botoni 1395/01/01 Tehran Tehran
InĀ [22]:
df.shape
Out[22]:
(95427, 8)
InĀ [23]:
px.histogram(df, x='price')
InĀ [24]:
px.histogram(df, x='masahat')
InĀ [25]:
px.histogram(df, x='age')
InĀ [26]:
px.scatter(df, x='price', y='masahat', 
           labels={"price": "gheymat_yek_metr_moraba bray har saze",
                    "masahat": "masahat_saze"},
           width=350, height=250)
InĀ [27]:
labels = {
    'Ostan': 'استان',
    'price': 'Ł‚ŪŒŁ…ŲŖ'
}

# رسم Ł†Ł…ŁˆŲÆŲ§Ų± area ŲØŲ§ استفاده Ų§Ų² plotly.express
fig = px.area(df, x='Shahrestan', y='price', color='Shahrestan', line_group='Shahrestan', labels=labels, width=600, height=350)

# Ł†Ł…Ų§ŪŒŲ“ Ł†Ł…ŁˆŲÆŲ§Ų±
fig.show()
InĀ [28]:
from itertools import cycle

labels = {
    'Shahrestan': 'ؓهرستان',
    'price': 'Ł‚ŪŒŁ…ŲŖ',
    'Year': 'Ų³Ų§Ł„'
}

# رسم Ł†Ł…ŁˆŲÆŲ§Ų± خطی ŲØŲ§ استفاده Ų§Ų² plotly.express
fig = px.line(df, x='date', y='price', color='Shahrestan', width=600, height=350, labels=labels)

# ŲŖŁ†ŲøŪŒŁ… Ų§Ų³ŲŖŲ§ŪŒŁ„ Ų®Ų·ā€ŒŁ‡Ų§ŪŒ Ł†Ł…ŁˆŲÆŲ§Ų±
styles = cycle([None, 'dashdot', 'dash', 'dot'])
for ostan in df['Shahrestan'].unique():
    fig.update_traces(selector=dict(name=ostan), line=dict(dash=next(styles)))

fig.update_yaxes(title_text='Ł‚ŪŒŁ…ŲŖ (ŲŖŁˆŁ…Ų§Ł†)')
fig.update_xaxes(title_text='Ų³Ų§Ł„')

# Ł†Ł…Ų§ŪŒŲ“ Ł†Ł…ŁˆŲÆŲ§Ų±
fig.show()
InĀ [29]:
# تعریف ŲØŲ±Ś†Ų³ŲØā€ŒŁ‡Ų§ برای Ł†Ł…ŁˆŲÆŲ§Ų±
df_labels = {
    'price': 'Ł‚ŪŒŁ…ŲŖ فروؓ (ŲŖŁˆŁ…Ų§Ł†)',
    'date': 'تاریخ',
    'masahat': 'Ł…Ų³Ų§Ų­ŲŖ (Ł…ŲŖŲ± Ł…Ų±ŲØŲ¹)'
}

# رسم Ł†Ł…ŁˆŲÆŲ§Ų± خطی ŲØŲ§ استفاده Ų§Ų² plotly.express
fig = px.line(df, x='date', y='price', title='Ł‚ŪŒŁ…ŲŖ فروؓ Ł…ŪŒŲ§Ł†Ł‡', labels=df_labels, width=500, height=250)

# Ų§ŁŲ²ŁˆŲÆŁ† Ų­Ų§Ų“ŪŒŁ‡ā€ŒŁ‡Ų§ به Ł†Ł…ŁˆŲÆŲ§Ų±
fig.update_layout(margin=dict(t=30))

# Ł†Ł…Ų§ŪŒŲ“ Ł†Ł…ŁˆŲÆŲ§Ų±
fig.show()
InĀ [30]:
 #رسم Ł†Ł…ŁˆŲÆŲ§Ų± scatter برای دو ویژگی SepalLength و SepalWidth
plt.figure(figsize=(8, 6))
plt.scatter(df['masahat'], df['price'], color='blue', alpha=0.7)
plt.title('masaht vs. price')
plt.xlabel('price')
plt.ylabel('masahat')
plt.grid(True)
plt.show()
No description has been provided for this image
InĀ [31]:
city_counts = df['Ostan'].value_counts()
print(city_counts)
Ostan
Tehran    95427
Name: count, dtype: int64